import re
import nltk
from collections import Counter
from nltk.corpus import stopwords
# 导入数据
def load_data(filePath):
    with open(filePath,encoding="utf-8") as f:
        content = f.read()
    return content
# 初次处理数据
def deal_data(cont):
    content = cont.lower()
    # content = content.replace('\n','')
    # content = content.replace(' ', '')
    return content
# 分词
def devide_data(cont):
    text = nltk.word_tokenize(cont)
    return text

# 统计词频
def count_data(cont_list):
    c = Counter()
    theStopWord = stopwords.words('english')
    print(theStopWord)
    for x in cont_list:
        if len(x)>1 and (x not in theStopWord):
            c[x] += 1
    print(c.most_common(100))
if __name__ == "__main__":
    content = load_data("./one.txt")
    content = deal_data(content)
    content_list = devide_data(content)
    count_data(content_list)

